library(tidyverse)
library(plotly)
data <- read_csv('./gapminder_clean.csv')
data <- data %>%
select(-1) %>%
rename(
co2em = `CO2 emissions (metric tons per capita)`,
popden = `Population density (people per sq. km of land area)`,
lifeExp = `Life expectancy at birth, total (years)`,
)
data1962 <- data %>%
filter(Year == 1962) %>%
select(gdpPercap, co2em) %>%
drop_na()
ggplot(data = data1962) +
geom_point(mapping = aes(
x = gdpPercap,
y = co2em)) +
labs(x = "GDP per capita", y = "CO2 emissions per capita (metric tons)")
cor.test(data1962 %>% pull(gdpPercap), data1962 %>% pull(co2em))
##
## Pearson's product-moment correlation
##
## data: data1962 %>% pull(gdpPercap) and data1962 %>% pull(co2em)
## t = 25.269, df = 106, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8934697 0.9489792
## sample estimates:
## cor
## 0.9260817
corrs <- data %>%
group_by(Year) %>%
select(Year, gdpPercap, co2em) %>%
drop_na() %>%
summarise(correlation = cor(gdpPercap, co2em))
maxi <- lapply(corrs, max)
The strongest correlation is 0.9387918 in the year 2007.
max_em_year_data <- data %>%
filter(Year == maxi$Year) %>%
select(gdpPercap, co2em, pop, continent, `Country Name`) %>%
drop_na()
fig <- ggplot(data = max_em_year_data) +
geom_point(aes(
x = gdpPercap,
y = co2em,
size = pop,
color = continent,
text = paste("Country: ", `Country Name`,
"\nGDP: ", gdpPercap,
"\nCO2 emissions: ", co2em))) +
xlab("GDP per capita") +
ylab("CO2 emissions per capita (metric tons)") +
ggtitle(str_glue("GDP vs CO2 emissions per capita in ", maxi$Year))
ggplotly(fig, tooltip = "text")
data_popden <- data %>%
group_by(`Country Name`) %>%
select(`Country Name`, popden) %>%
summarise(avg_popden = mean(popden, na.rm = TRUE)) %>%
arrange(desc(avg_popden))
num_countries_shown <- 20
ggplot(data = head(data_popden, n = num_countries_shown)) +
geom_bar(
mapping = aes(x = avg_popden, y = reorder(`Country Name`, avg_popden)),
stat = "identity") +
xlab("Average population density (people per sq. km of land)") +
ylab("") +
ggtitle(str_glue(num_countries_shown, " most population dense countries 1962-2007"))
The country with the highest average population density between 1962 and 2007 is Macao SAR, China.
data_lifeExp <- data %>%
select(`Country Name`, Year, lifeExp) %>%
drop_na() %>%
group_by(`Country Name`) %>%
filter(any(Year == 1962)) %>%
mutate(lifeExpSince1962 = lifeExp - lifeExp[Year == 1962]) %>%
ungroup()
countries_highest <- data_lifeExp %>%
group_by(`Country Name`) %>%
mutate(lifeExpTotalChange = lifeExp[Year == 2007] - lifeExp[Year == 1962]) %>%
summarise(lifeExpChange = max(lifeExpTotalChange)) %>%
arrange(desc(lifeExpChange))
cutoff_lifeExpChange <- min(head(countries_highest, n = 10)$lifeExpChange)
data_lifeExpTop <- data_lifeExp %>%
group_by(`Country Name`) %>%
filter(lifeExp[Year == 2007] - lifeExp[Year == 1962] >= cutoff_lifeExpChange)
fig <- ggplot(data = data_lifeExpTop) +
geom_line(mapping = aes(
x = Year,
y = lifeExpSince1962,
color = `Country Name`)
) +
ylab("Life expectancy at birth since 1962 (years)")
ggplotly(fig)
The Maldives had the highest increase in life expectancy at birth from 1962 to 2007.